In [ ]:
import pandas as pd
import geopandas as gpd
import shapely
import plotly.express as px
import numpy as np
import json

from datetime import datetime

from sodapy import Socrata


import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook"
In [ ]:
boroughs = gpd.read_file('data\Borough Boundaries.zip')

lines_gdf = gpd.read_file('data\Subway Lines.zip')
stations = pd.read_csv('data\MTA_Subway_Stations_20240214.csv')
stations_gdf = gpd.GeoDataFrame(data=stations, 
                       geometry=stations['Georeference']
                       .apply(shapely.wkt.loads))

delays = pd.read_csv('data\MTA_Subway_Trains_Delayed__Beginning_2020_20240214.csv')

Subway Data¶

General subway data analytics¶

In [ ]:
#total number of stations in the system
len(stations_gdf['Station ID'].unique())
Out[ ]:
493
In [ ]:
# ADA: 0 if not accessable; 1 if fully accessable; 2 if partially accessable (one direction)
stations_gdf['ADA_labels'] = stations_gdf.ADA.map({0:"non-compliant",
                                                   1:"ADA-compliant",
                                                   2:"partially compliant"})

stations_gdf.rename(columns={'Stop Name':'Station'}, inplace=True)
In [ ]:
# percent of stations that are ADA compliant in 2024
stations_gdf.ADA_labels.value_counts(normalize=True)
Out[ ]:
non-compliant          0.697581
ADA-compliant          0.286290
partially compliant    0.016129
Name: ADA_labels, dtype: float64
In [ ]:
px.set_mapbox_access_token(open("mapbox_token").read())

stations_gdf['marker_size'] = 3

# plot subway stations
fig = px.scatter_mapbox(stations_gdf, 
                                lon=stations_gdf['GTFS Longitude'], 
                                lat=stations_gdf['GTFS Latitude'],
                                hover_data={'Station':True, 'GTFS Latitude':False,'GTFS Longitude':False, 'ADA_labels':False},
                                color="ADA_labels", 
                                labels={"ADA_labels": "Subway Station ADA Compliance"},
                                size = 'marker_size',
                                size_max = 5,
                                width=1200, height=1000,
                                zoom=10)


# plot subway lines
lats = []
lons = []
names = []
for feature, name in zip(lines_gdf.geometry, lines_gdf.name):
    if isinstance(feature, shapely.geometry.linestring.LineString):
        linestrings = [feature]
    elif isinstance(feature, shapely.geometry.multilinestring.MultiLineString):
        linestrings = feature.geoms
    else:
        continue
    for linestring in linestrings:
        x, y = linestring.xy
        lats = np.append(lats, y)
        lons = np.append(lons, x)
        names = np.append(names, [name]*len(y))
        lats = np.append(lats, None)
        lons = np.append(lons, None)
        names = np.append(names, None)
names = list(names)
fig.add_trace(px.line_mapbox(lat=lats, lon=lons, 
                            labels={'hover_data_0':'lines'},
                            hover_name=names, 
                            # hover_data=[names],
                            #  color=names,
                            # hover_data={'lat': False, 'lon': False, 'hover_data_0':names}
                             ).data[0])
fig.update_traces(line={'width': 1})

# order of traces on map
fig.data = fig.data[::-1]

fig.show()

Delays per subway line¶

In [ ]:
# calculate length of each subway line
subway_line_list = ['1', '2', '3', '4', '5', '6', '7', 'A', 'B', 'C', 'D', 'E', 'F',
       'G', 'J-Z', 'L', 'M', 'N', 'Q', 'R']

subway_line_length = {}

# loop through each subway line
for line in subway_line_list:
    subway_line_length[line] = 0 # initialize 0 length for each subway line

    # loop through each row in lines_gdf
    for row in lines_gdf.iterrows():

        # add the shape length if the current line is in the lines_gdf name
        if str(row[1][1]).find(line) != -1:
            subway_line_length[line] += row[1][4]

subway_line_length

# testing above code works
# lines_gdf[lines_gdf.name.str.contains('A')].sum()[4]
# lines_gdf[lines_gdf.name.str.contains('1')].sum()[4]
Out[ ]:
{'1': 80688.49153911599,
 '2': 140478.29424623805,
 '3': 100918.98820500403,
 '4': 135540.12091733,
 '5': 161968.29187684198,
 '6': 84055.2400603,
 '7': 54718.379981503,
 'A': 197918.53738361102,
 'B': 154071.13705718605,
 'C': 108888.57429322302,
 'D': 160600.92606391906,
 'E': 113068.626969864,
 'F': 152976.558610879,
 'G': 57745.32480826,
 'J-Z': 73964.34249342198,
 'L': 53486.00531743,
 'M': 107209.47175261998,
 'N': 116975.52983082198,
 'Q': 102089.028988739,
 'R': 134804.671964215}
In [ ]:
# calculations for delays per line

# total number of delays for each line, by month 
# note - there are multiple types of delays, so summing them all togehter to get a total count for each month by line
delays.groupby(by=['line','month']).sum()

# average monthly number of delays for each line between 2020-01 and 2023-12
# taking average of total monthly delays (pervious step)
delays_by_line = delays.groupby(by=['line','month']).sum().groupby(by='line').mean().round(2).drop(columns=['day_type'])
delays_by_line = delays_by_line[0:-1] # drop stationwide delays

# average number of monthly delays per line between 2020-01 and 2023-12
avg_delays = delays_by_line.mean()[0]

# TODO: need to change 'JZ' to 'J-Z' in dataframe

# delays.line = ['J-Z' if delays.line == 'JZ' else delays.line]

# convert subway_line_length to dataframe & merge with delays_by_line dataframe
line_length_df = pd.DataFrame.from_dict(subway_line_length, orient='index', columns=['length']
                                        ).reset_index().rename(columns={'index':'line'}).round(2)

delays_by_line.reset_index()
delays_by_line = pd.merge(left=delays_by_line, left_on='line',
                            right=line_length_df, right_on='line',
                            how='outer')

# calculate delays per subway line length
delays_by_line['delays_per_length'] = round(delays_by_line.delays / delays_by_line.length,4)


delays_by_line.rename(columns={'line':'Subway Line', 
                                'delays': 'Average number of delays',
                                'length': 'Subway line length',}, inplace=True)
In [ ]:
# lines with the most overall delays
delays_by_line.sort_values(by='Average number of delays', ascending=False).head()
Out[ ]:
Subway Line Average number of delays Subway line length delays_per_length
17 N 2802.90 116975.53 0.0240
7 A 2749.15 197918.54 0.0139
12 F 2422.29 152976.56 0.0158
5 6 2317.96 84055.24 0.0276
11 E 2224.12 113068.63 0.0197
In [ ]:
# lines with the most delays per length
delays_by_line.sort_values(by='delays_per_length', ascending=False).head()
Out[ ]:
Subway Line Average number of delays Subway line length delays_per_length
5 6 2317.96 84055.24 0.0276
6 7 1449.40 54718.38 0.0265
17 N 2802.90 116975.53 0.0240
0 1 1627.23 80688.49 0.0202
15 L 1066.65 53486.01 0.0199

Elevator outages¶

In [ ]:
# MTA NYCT Subway Elevator and Escalator Availability: Beginning 2015
MyAppToken = 'm2MgET0gzpDk2Is9e4TOVpdZ3'

query = f"""SELECT * 
            WHERE total_outages>0 AND equipment_type='Elevator'
            ORDER BY month DESC 
            LIMIT 15000"""

client = Socrata("data.ny.gov",
                 MyAppToken)
results = client.get("rc78-7x78", query=query)
elevator_outages_df = pd.DataFrame.from_records(results)
In [ ]:
# cleaning up data into better formats...

elevator_outages_df.time_since_major_improvement.replace('N/A',np.NaN, inplace=True)

elevator_outages_df.month = pd.to_datetime(elevator_outages_df.month,format='%Y-%m-%dT%H:%M:%S.000')

# filter to just look at 2021 through 2023
elevator_outages_df = elevator_outages_df[elevator_outages_df.month >'2019-12-01']

columns = ['total_outages',
       'scheduled_outages', 'unscheduled_outages', 'entrapments',
       'time_since_major_improvement', 'am_peak_availability',
       'am_peak_hours_available', 'am_peak_total_hours',
       'pm_peak_availability', 'pm_peak_hours_available',
       'pm_peak_total_hours', '_24_hour_availability',
       '_24_hour_hours_available', '_24_hour_total_hours',
       'station_mrn', 'station_complex_mrn']

for column in columns:
    elevator_outages_df[column] = elevator_outages_df[column].astype('float')

elevator_outages_df.head()
Out[ ]:
month borough equipment_type equipment_code total_outages scheduled_outages unscheduled_outages entrapments time_since_major_improvement am_peak_availability ... pm_peak_availability pm_peak_hours_available pm_peak_total_hours _24_hour_availability _24_hour_hours_available _24_hour_total_hours station_name station_mrn station_complex_name station_complex_mrn
0 2023-12-01 Queens Elevator EL469 2.0 2.0 0.0 0.0 5.0 1.000000 ... 1.000000 124.000000 124.0 0.994915 740.216667 744.0 COURTSQ-XTN-G/7 281.0 Court Sq (E,G,M,7) 606.0
1 2023-12-01 Manhattan Elevator EL116 3.0 3.0 0.0 0.0 25.0 0.983199 ... 1.000000 124.000000 124.0 0.982303 730.833333 744.0 190ST-8AV-A 145.0 190 St - Station 145.0
2 2023-12-01 Manhattan Elevator EL443 7.0 5.0 2.0 0.0 84.0 0.983871 ... 0.981048 121.650000 124.0 0.975358 725.666667 744.0 LEXINGTONAV/63ST-63S-F/Q 223.0 Lexington Av/63 St - Station 223.0
3 2023-12-01 Manhattan Elevator EL217 5.0 2.0 3.0 2.0 283.0 0.961962 ... 0.975806 121.000000 124.0 0.969377 721.216667 744.0 14ST-UNIONSQ-LEX-4/5/6 406.0 14 St-Union Sq (L,N,Q,R,4,5,6) 602.0
4 2023-12-01 Bronx Elevator EL515 2.0 1.0 1.0 0.0 3.0 1.000000 ... 0.983602 121.966667 124.0 0.993347 739.050000 744.0 E149ST-PEL-6 373.0 E 149 St - Station 373.0

5 rows × 22 columns

In [ ]:
# elevator outages by station
# first calculate one number for each station by month (some stations have multiple elevators, so calculating the average across all elevators in a station)
# then calculate the avaregae outage for each station from 2021 through 2023 
avg_outages_by_station = elevator_outages_df.groupby(by=['station_mrn', 'month']).mean().reset_index().groupby(by='station_mrn').mean().round(2).reset_index()
avg_outages_by_station.columns

avg_outages_by_station = avg_outages_by_station[[
        'station_mrn', 
        'total_outages', 
        'scheduled_outages','unscheduled_outages', 
    #    'entrapments', 'time_since_major_improvement',
       'am_peak_availability', 
    #    'am_peak_hours_available', 'am_peak_total_hours', 
       'pm_peak_availability',
    #    'pm_peak_hours_available', 'pm_peak_total_hours',
       '_24_hour_availability', 
    #    '_24_hour_hours_available', '_24_hour_total_hours', 
    #    'station_complex_mrn'
       ]]

avg_outages_by_station.head()
Out[ ]:
station_mrn total_outages scheduled_outages unscheduled_outages am_peak_availability pm_peak_availability _24_hour_availability
0 2.0 3.01 1.77 1.24 0.99 0.99 0.98
1 9.0 4.18 1.72 2.46 0.97 0.98 0.97
2 11.0 3.53 2.28 1.25 0.97 0.98 0.96
3 12.0 3.65 1.96 1.68 0.98 0.98 0.97
4 15.0 3.46 2.40 1.06 0.97 0.98 0.97
In [ ]:
# convert station_mrn to int type for merge
avg_outages_by_station.station_mrn = avg_outages_by_station.station_mrn.astype('int')

# filter out some columns
stations_gdf_temp = stations_gdf[[
                                # 'GTFS Stop ID', 
                                  'Station ID', 
                                #   'Division', 'Line',
                                'GTFS Longitude', 'GTFS Latitude',
                                  'Station', 'Borough', 'Daytime Routes', 
                                  'Structure', 
                                #   'North Direction Label', 'South Direction Label',
                                # 'ADA', 'ADA Northbound', 'ADA Southbound',
                                # 'geometry', 
                                'ADA_labels']]

station_outages = pd.merge(left=stations_gdf_temp, left_on='Station ID',
                           right=avg_outages_by_station.groupby(by='station_mrn').mean().reset_index(), right_on='station_mrn',
                           how='left')

# only plotting ADA stations
station_outages = station_outages[station_outages.ADA_labels == 'ADA-compliant']

station_outages.total_outages.replace(np.NaN, 0, inplace=True)
In [ ]:
# stations with the most average monthly elevator outages
station_outages.sort_values(by='total_outages', ascending=False).head(5)
Out[ ]:
Station ID GTFS Longitude GTFS Latitude Station Borough Daytime Routes Structure ADA_labels station_mrn total_outages scheduled_outages unscheduled_outages am_peak_availability pm_peak_availability _24_hour_availability
430 430 -73.893064 40.824073 Simpson St Bx 2 5 Elevated ADA-compliant 430.0 6.50 2.94 3.56 0.95 0.95 0.94
392 392 -73.937594 40.804138 125 St M 4 5 6 Subway ADA-compliant 392.0 5.66 2.29 3.36 0.97 0.96 0.96
310 310 -73.972323 40.793919 96 St M 1 2 3 Subway ADA-compliant 310.0 5.38 2.69 2.69 0.97 0.97 0.96
421 421 -73.866256 40.877850 Gun Hill Rd Bx 2 5 Elevated ADA-compliant 421.0 4.97 2.67 2.30 0.95 0.94 0.93
221 221 -73.942836 40.754203 21 St-Queensbridge Q F Subway ADA-compliant 221.0 4.96 2.01 2.95 0.98 0.97 0.97
In [ ]:
# stations with elevators that are least available in the AM and PM peak hours
station_outages.sort_values(by='am_peak_availability', ascending=True).head(5)
Out[ ]:
Station ID GTFS Longitude GTFS Latitude Station Borough Daytime Routes Structure ADA_labels station_mrn total_outages scheduled_outages unscheduled_outages am_peak_availability pm_peak_availability _24_hour_availability
359 359 -73.947642 40.632836 Flatbush Av-Brooklyn College Bk 2 5 Subway ADA-compliant 359.0 4.47 2.47 2.00 0.95 0.95 0.94
421 421 -73.866256 40.877850 Gun Hill Rd Bx 2 5 Elevated ADA-compliant 421.0 4.97 2.67 2.30 0.95 0.94 0.93
430 430 -73.893064 40.824073 Simpson St Bx 2 5 Elevated ADA-compliant 430.0 6.50 2.94 3.56 0.95 0.95 0.94
114 115 -74.002578 40.739777 8 Av M L Subway ADA-compliant 115.0 4.47 2.68 1.79 0.96 0.96 0.95
317 317 -73.987495 40.755290 Times Sq-42 St M 1 2 3 Subway ADA-compliant 317.0 4.43 2.38 2.05 0.96 0.97 0.96
In [ ]:
# stations with elevators that are least available in the AM and PM peak hours
station_outages.sort_values(by='pm_peak_availability', ascending=True).head(5)
Out[ ]:
Station ID GTFS Longitude GTFS Latitude Station Borough Daytime Routes Structure ADA_labels station_mrn total_outages scheduled_outages unscheduled_outages am_peak_availability pm_peak_availability _24_hour_availability
421 421 -73.866256 40.877850 Gun Hill Rd Bx 2 5 Elevated ADA-compliant 421.0 4.97 2.67 2.30 0.95 0.94 0.93
430 430 -73.893064 40.824073 Simpson St Bx 2 5 Elevated ADA-compliant 430.0 6.50 2.94 3.56 0.95 0.95 0.94
195 195 -73.825798 40.685951 Ozone Park-Lefferts Blvd Q A Elevated ADA-compliant 195.0 4.78 2.03 2.75 0.96 0.95 0.95
359 359 -73.947642 40.632836 Flatbush Av-Brooklyn College Bk 2 5 Subway ADA-compliant 359.0 4.47 2.47 2.00 0.95 0.95 0.94
97 98 -73.941126 40.700260 Flushing Av Bk J M Elevated ADA-compliant 98.0 4.26 2.30 1.96 0.96 0.96 0.95
In [ ]:
# plot subway stations
station_outages['marker_size'] = 3

fig = px.scatter_mapbox(station_outages, 
                                lon=station_outages['GTFS Longitude'], 
                                lat=station_outages['GTFS Latitude'],
                                hover_data={'Station':True, 'GTFS Latitude':False,'GTFS Longitude':False, 'ADA_labels':False, 'total_outages':True, 'marker_size':False},
                                color="_24_hour_availability", 
                                # color='total_outages',
                                color_continuous_scale='Reds_r',
                                size = 'marker_size',
                                size_max = 8,
                                # size='total_outages',
                                # labels={"ADA_labels": "Subway Station ADA Compliance"},
                                width=1200, height=1000,
                                zoom=10)


# plot subway lines
lats = []
lons = []
names = []
for feature, name in zip(lines_gdf.geometry, lines_gdf.name):
    if isinstance(feature, shapely.geometry.linestring.LineString):
        linestrings = [feature]
    elif isinstance(feature, shapely.geometry.multilinestring.MultiLineString):
        linestrings = feature.geoms
    else:
        continue
    for linestring in linestrings:
        x, y = linestring.xy
        lats = np.append(lats, y)
        lons = np.append(lons, x)
        names = np.append(names, [name]*len(y))
        lats = np.append(lats, None)
        lons = np.append(lons, None)
        names = np.append(names, None)
names = list(names)
fig.add_trace(px.line_mapbox(lat=lats, lon=lons, 
                            labels={'hover_data_0':'lines'},
                            hover_name=names, 
                            # hover_data=[names],
                            #  color=names,
                            # hover_data={'lat': False, 'lon': False, 'hover_data_0':names}
                             ).data[0])
fig.update_traces(line={'width': 1})

# order of traces on map
fig.data = fig.data[::-1]

fig.show()

Ambulatory Disability Census Data¶

Get GeoJSON from GeoPandas dataframe and load JSON¶

In [ ]:
# ny state shape file
ny_gdf = gpd.read_file('data\census_data\cb_2022_36_tract_500k.zip')

# drop irrelevent columns in shapefile
drop_columns = ['STATEFP', 'COUNTYFP', 'TRACTCE', 'GEOID', 'NAME',
       'NAMELSAD', 'STUSPS', 'STATE_NAME', 'LSAD', 'AWATER']
ny_gdf.drop(columns=drop_columns, inplace=True)

# filter to NYC counties
nyc_counties = ['New York County', 'Queens County', 'Richmond County',
                'Kings County', 'Bronx County']
nyc_gdf = ny_gdf[ny_gdf['NAMELSADCO'].isin(nyc_counties)]
In [ ]:
nyc_gdf.head()
Out[ ]:
AFFGEOID NAMELSADCO ALAND geometry
31 1400000US36005013500 Bronx County 167401 POLYGON ((-73.90508 40.82711, -73.90385 40.828...
32 1400000US36005009200 Bronx County 310475 POLYGON ((-73.85773 40.83356, -73.85465 40.833...
33 1400000US36005005400 Bronx County 196516 POLYGON ((-73.88389 40.82981, -73.88235 40.831...
34 1400000US36005036501 Bronx County 95827 POLYGON ((-73.88832 40.84518, -73.88723 40.844...
35 1400000US36005044902 Bronx County 156698 POLYGON ((-73.87136 40.90004, -73.86900 40.901...
In [ ]:
nyc_gdf.to_file("nyc_tracts_compressed.json", driver='GeoJSON')

Prepare GeoJSON¶

In [ ]:
# prepare GeoJSON
with open('nyc_tracts_compressed.json') as f:
  nyc_tracts = json.load(f)
In [ ]:
#Check the keys of the json
nyc_tracts["features"][0].keys()
Out[ ]:
dict_keys(['type', 'properties', 'geometry'])
In [ ]:
#We have the first level keys. The properties key is what we are interested in. Let's dig deeper there
#We'll see that almost all the keys corresponding to the columns in the geopandas object are present here.
nyc_tracts["features"][0]['properties'].keys()
Out[ ]:
dict_keys(['AFFGEOID', 'NAMELSADCO', 'ALAND'])

Load ambulatory data¶

In [ ]:
# demographic data with population and ambulatory population for NYC census tracts in 2022
demo_2022 = pd.read_csv('data\census_data\ACSDT5Y2022.B18105-Data.csv', header=1)

# drop irrelevent columns
demo_2022 = demo_2022[demo_2022.columns.drop(list(demo_2022.filter(regex='Margin')))]

# calculate percentage of population with ambulatory difficulty in each census tract
demo_2022['With_Ambulatory_Difficulty'] = demo_2022.filter(regex='With an ambulatory').sum(axis=1)
demo_2022['Perc_with_Ambulatory_Difficulty'] = demo_2022['With_Ambulatory_Difficulty'] / demo_2022['Estimate!!Total:']

demo_2022 = demo_2022[['Geography', 'Estimate!!Total:', 'With_Ambulatory_Difficulty', 'Perc_with_Ambulatory_Difficulty']]

# drop census tracts that have no population
demo_2022 = demo_2022[demo_2022['Estimate!!Total:'] > 0]

# drop census tracts with percent of pop over 30% - likely issues with data (too small census tracts or nursing homes)
demo_2022 = demo_2022[demo_2022.Perc_with_Ambulatory_Difficulty < .3]

demo_2022.head()
Out[ ]:
Geography Estimate!!Total: With_Ambulatory_Difficulty Perc_with_Ambulatory_Difficulty
1 1400000US36005000200 4646 298 0.064141
2 1400000US36005000400 6001 344 0.057324
3 1400000US36005001600 5684 643 0.113125
4 1400000US36005001901 1987 178 0.089582
5 1400000US36005001902 1760 221 0.125568
In [ ]:
# merge demographic data with shapefile
demo_gdf = nyc_gdf.merge(demo_2022, left_on='AFFGEOID', right_on='Geography', how='left')
In [ ]:
fig = px.choropleth_mapbox(demo_2022, geojson=nyc_tracts, 
                           locations='Geography', 
                           color='Perc_with_Ambulatory_Difficulty',
                           color_continuous_scale="Viridis_r",
                           opacity=0.25,
                           featureidkey="properties.AFFGEOID",
                           center = {"lat": 40.75, "lon": -74}, 
                           zoom=10,
                           height = 800, width = 1200,
                          )

fig.update_geos(fitbounds="locations", visible=False)
fig.show()

Combine plots¶

In [ ]:
# plot subway stations
stations_gdf['marker_size'] = 3

fig = px.scatter_mapbox(stations_gdf, 
                                lon=stations_gdf['GTFS Longitude'], 
                                lat=stations_gdf['GTFS Latitude'],
                                hover_data={'Station':True, 'GTFS Latitude':False,'GTFS Longitude':False, 'ADA_labels':False},
                                color="ADA_labels", 
                                color_continuous_scale="BuGn",
                                labels={"ADA_labels": "Subway Station ADA Compliance"},
                                size = 'marker_size',
                                size_max = 5,
                                width=1200, height=1000,
                                zoom=10)

# fig = px.scatter_mapbox(station_outages, 
#                                 lon=station_outages['GTFS Longitude'], 
#                                 lat=station_outages['GTFS Latitude'],
#                                 hover_data={'Station':True, 'GTFS Latitude':False,'GTFS Longitude':False, 'ADA_labels':False, 'total_outages':True, 'marker_size':False},
#                                 color="_24_hour_availability", 
#                                 # color='total_outages',
#                                 color_continuous_scale='Reds_r',
#                                 size = 'marker_size',
#                                 size_max = 8,
#                                 # size='total_outages',
#                                 # labels={"ADA_labels": "Subway Station ADA Compliance"},
#                                 width=1200, height=1000,
#                                 zoom=10)

# plot subway lines
lats = []
lons = []
names = []
for feature, name in zip(lines_gdf.geometry, lines_gdf.name):
    if isinstance(feature, shapely.geometry.linestring.LineString):
        linestrings = [feature]
    elif isinstance(feature, shapely.geometry.multilinestring.MultiLineString):
        linestrings = feature.geoms
    else:
        continue
    for linestring in linestrings:
        x, y = linestring.xy
        lats = np.append(lats, y)
        lons = np.append(lons, x)
        names = np.append(names, [name]*len(y))
        lats = np.append(lats, None)
        lons = np.append(lons, None)
        names = np.append(names, None)
names = list(names)
fig.add_trace(px.line_mapbox(lat=lats, lon=lons, 
                            labels={'hover_data_0':'lines'},
                            hover_name=names, 
                            # hover_data=[names],
                            #  color=names,
                            # hover_data={'lat': False, 'lon': False, 'hover_data_0':names}
                             ).data[0])
fig.update_traces(line={'width': 1})

fig.add_trace(px.choropleth_mapbox(demo_2022, geojson=nyc_tracts, 
                           locations='Geography', 
                           color='Perc_with_Ambulatory_Difficulty',
                           color_continuous_scale="BuGn",
                           opacity=0.25,
                           featureidkey="properties.AFFGEOID",
                           center = {"lat": 40.75, "lon": -74}, 
                           zoom=10,
                           height = 800, width = 1200,
                          ).data[0])


# order of traces on map
fig.data = fig.data[::-1]

fig.show()